# @Time : 2021/10/25 15:13
# @Author : Li Kunlun
# @Description : 对博客程序进行爬虫

import requests
from bs4 import BeautifulSoup

urls = [f"https://www.cnblogs.com/#p{page}" for page in range(1, 50 + 1)]


# 生产者
def craw(url):
    r = requests.get(url)
    return r.text


# 消费者
def parse(html):
    # 对html进行解析，拿到title内容
    soup = BeautifulSoup(html, "html.parser")
    # "a"表示 a标签
    links = soup.find_all("a", class_="post-item-title")
    # 返回一个元组， href和标签
    return [(link["href"], link.get_text()) for link in links]
    pass


if __name__ == '__main__':
    # 解析出该网页中的href和标签
    for result in parse(craw(urls[2])):
        print(result)
    pass
